#This file creates figure 1 for Dahlgaard (2017)

load("agg_day_year_all.rdata")
load("n_day_year_all.rdata")

## commented out for replication files 
#  install.packages("xtable")
#  install.packages("ggplot2")
#  install.packages("dplyr")
#  install.packages("rmeta")
#  install.packages("rdrobust")

library(xtable)
library(ggplot2)
library(dplyr)
library(rmeta)
library(rdrobust)


# create large data frame from aggregated data

data <- left_join(data_day_year, data_n_year,
                  by = c("days", "year"))

data_close <- 
  data %>%
  mutate(voted     = round(par_vote*n),
         abstained = round((1-par_vote)*n),
         treated   = days > 0) %>% 
  select(year, days, treated, voted, abstained)

data_voted <- 
  as.data.frame(lapply(data_close, 
                       function(x,p) rep(x,p), 
                       data_close[["voted"]])) %>%
  mutate(voted = 1)

data_abstained <- 
  as.data.frame(lapply(data_close, 
                       function(x,p) rep(x,p), 
                       data_close[["abstained"]])) %>%
  mutate(voted = 0)

data <- 
  rbind(data_voted, data_abstained)

# run and store models 

models_list <- list()

year <- c(2009, 2013, 2014, 2015)

for(i in 1:4){
  # Subset to yearly data frames
  year_vec <- year[i]
  data_year <- 
    data %>%
    ungroup() %>%
    mutate(days = days) %>% 
    filter(year == year_vec )
  
  # run rdrobust model and store it in list
  models_list[[i]] <- rdrobust(data_year$voted, data_year$days)
  print(i)
}

data_plot <- NULL # create empty object

for(i in 1:4){

  year_vec <- year[i] # create number for year
  # subset to data from year and with observations within rdrobust bandwidth
  bw       <- models_list[[i]]$h_l
  sub_data <- 
    data %>% 
    filter(year == year_vec & abs(days) < bw) %>% 
    mutate(weight = 1 - abs(days) / bw, #assign triangular weights
           treated = days > 0) # create treatment dummy
  
  # estimate WLS model (same as estimated by rdrobust)
  mod <- lm(voted ~ treated*days, data = sub_data, weights = weight) 

  # create data frame with one observation per day within bandwidth
  new_data <- 
    data.frame(days = seq(- floor(bw),
                          ceiling(bw), 1) - 0.5) %>% 
    mutate(treated = days > 0) %>% 
    filter(abs(days) < bw)
  
  # predict values for each day  
  new_data$predicts <- predict.lm(mod, newdata = new_data)
  
  # take aggregated data for the year and join with predicted data
  data_plot_year <- 
    data_day_year %>%
    filter(year == year_vec) %>% 
    select(days, year, par_vote) %>%
    left_join(., new_data, by = "days")
  
  # store in data for plotting 
  data_plot <- rbind(data_plot, data_plot_year)
  
}

# subset to data within ten day bw
data_plot_10 <- 
  data_plot %>% 
  filter(abs(days) < 10) %>%
  mutate(window = "Observations within ten days") 

# subset to data within estimation windows
data_plot <-
  data_plot %>% 
  filter(!is.na(predicts)) %>%
  mutate(window = "Observations within six months")

# bind data frames 
data_plot <- 
  rbind(data_plot, data_plot_10)

# subset data frame above cutoff
data_plot_pos <- 
  data_plot %>% 
  filter(treated == TRUE)

# subset data frame below cutoff
data_plot_neg <- 
  data_plot %>% 
  filter(treated == FALSE)

# plot 
plot <- 
  ggplot(data_plot, 
         aes(x = days, y = 100*par_vote)) + 
  geom_point(alpha = 0.4) + 
  facet_grid(year ~ window, scales = "free") + 
  theme_classic() + 
  geom_smooth(data = data_plot_pos,  # add lines above cutoff
              aes(x = days, y = 100*predicts), 
              fill = NA, 
              method = lm, 
              color = "black") +
  geom_smooth(data = data_plot_neg,  # add lines below cutoff
              aes(x = days, 
                  y = 100*predicts), 
              fill = NA, 
              method = lm, 
              color = "black") + 
  ylab("Parents' turnout rate (%)") +
  xlab("Days from cutoff") + 
  scale_y_continuous(limits = c(50, 95), breaks = seq(50, 90, 10)) + 
  theme(panel.spacing = unit(1, "lines"))



